import logging
import datetime
import numpy
from itertools import izip

from hydrat.task.sampler import isOneofM

logger = logging.getLogger(__name__)

def arff_export(fileobj, model, density_threshold = 0.10):
  """ Export an ARFF file from a hydrat model
      @param density_threshold Class Map density below which we produce a sparse ARFF
  """

  if model.class_vectors is not None and not isOneofM(model.class_vectors):
    raise ValueError, "Cannot export non 1-of-M problems to ARFF"

  logger.debug('Exporting model to ARFF')
  # Write header
  print >>fileobj, "% Generated by hydrat (http://hydrat.googlecode.com)"
  print >>fileobj, "%% Model exported from hydrat on %s" % datetime.datetime.now().isoformat()
  print >>fileobj, "%"
  for line in str(model).split('\n'):
    print >>fileobj, "%  ", line
  print >>fileobj
  
  # Write relation 
  name = '%s-%s-%s' % ( model.metadata['dataset'], model.metadata['feature_name'], model.metadata['class_name'] )
  print >>fileobj, "@RELATION %s" % name
  print >>fileobj

  # Write attributes
  logger.debug('Writing Attributes')
  print >>fileobj, "@ATTRIBUTE class {%s}" % ','.join(model.classlabels)
  for feat in model.features:
    print >>fileobj, "@ATTRIBUTE %-25s NUMERIC"% feat
  print >>fileobj

  # Write data
  # We automatically decide whether to use a dense or sparse representation based
  # on matrix density
  fvs = model.feature_vectors
  matrix_density = float(fvs.size) / (fvs.shape[0] * fvs.shape[1])
  logger.debug('Density %.3f, threshold %.3f', matrix_density, density_threshold)
  classlabel_array = numpy.array(model.classlabels)
  print >>fileobj, "@DATA"
  if matrix_density <= density_threshold:
    logger.debug('Writing data in sparse format')
    if model.class_vectors is None:
      for fv in fvs:
        fv.sort_indices()
        feature_repr = ','.join( '%d %s' % (i+1, v) for i,v in izip(fv.indices, fv.data) )
        print >>fileobj, '{ 0 %s, %s }' % ('?' ,feature_repr)
    else:
      for cv, fv in izip(model.class_vectors, fvs):
        fv.sort_indices()
        feature_repr = ','.join( '%d %s' % (i+1, v) for i,v in izip(fv.indices, fv.data) )
        classlabel = classlabel_array[cv][0]
        print >>fileobj, '{ 0 %s, %s }' % (classlabel, feature_repr)

  else:
    logger.debug('Writing data in dense format')
    dense_fv = fvs.toarray()
    if model.class_vectors is None:
      for fv in dense_fv:
        print >>fileobj, "?,%s" % (','.join(map(str,fv)))
    else:
      for cv, fv in zip(model.class_vectors, dense_fv):
        classlabel = classlabel_array[cv][0]
        print >>fileobj, "%s,%s" % (classlabel, ','.join(map(str,fv)))


